import json
import pymysql
from typing import List

# 字段映射（tag 字段已合并进来）
UNIVERSITY_FIELD_MAP = {
    "name": "name",
    "displayName": "display_name",
    "province": "province",
    "city": "city",
    "cityLevel": "city_level",
    "startYear": "creation_year",
    "category": "category",
    "type": "type",
    "belong": "belong",
    "belongType": "belong_type",
    "belongTypeText": "belong_type_text",
    "ownerType": "establishment_type_code",
    "ownerTypeText": "establishment_typ",
    "sorted": "sorted",
    "masterCount": "master_count",
    "doctorCount": "doctor_count",
    "coreCount": "core_count",
    "ylCount": "first_class_discipline_count",
    "dscpgCount": "double_first_class_discipline_count",
    "logoUrl": "logo_url",
    "baikeUrl": "baike_url",
    "enrollUrl": "enroll_url",
    "yggkUrl": "yggk_url",
    "vrUrl": "vr_url",
    "rkOrder": "rk"
}

# 标签字段映射（并入 universities 表）
TAG_FIELD_MAP = {
    "t211": "tag_211",
    "t985": "tag_985",
    "tshuangyiliu": "tag_double_first_class",
    "tqiangji": "tag_strong_foundation",
    "tzhongdian": "tag_key_university",
    "tgongjian": "tag_co_constructed",
    "ttese": "tag_characteristic",
    "tshuanggao": "tag_high_level"
}


def insert_universities_from_file(file_path: str, conn):
    with open(file_path, "r", encoding="utf-8") as f:
        json_data = json.load(f)
        insert_universities(json_data, conn)


def insert_universities(json_data: List[dict], conn):
    with conn.cursor() as cursor:
        for item in json_data:
            # universities 表字段（去除 version 和 id）
            uni_data = {}

            for k, v in UNIVERSITY_FIELD_MAP.items():
                if k in item:
                    uni_data[v] = item[k]

            for k, v in TAG_FIELD_MAP.items():
                if k in item:
                    uni_data[v] = item[k]

            if not uni_data:
                continue  # 无有效字段，跳过

            uni_columns = ', '.join(uni_data.keys())
            uni_placeholders = ', '.join(['%s'] * len(uni_data))
            sql_uni = f"INSERT INTO universities ({uni_columns}) VALUES ({uni_placeholders})"
            cursor.execute(sql_uni, list(uni_data.values()))

        conn.commit()
    print("✔ 所有数据已成功导入 universities 表！")


def get_connection():
    return pymysql.connect(
        host="localhost",
        port=3306,
        user="arch",
        password="root",
        database="NCEE",
        charset="utf8mb4",
        cursorclass=pymysql.cursors.DictCursor
    )


if __name__ == "__main__":
    file_path = "/home/zhao/projects/python/spider/data/colleges.json"
    conn = get_connection()
    try:
        insert_universities_from_file(file_path, conn)
    finally:
        conn.close()
